#######################################################################
##### specification curve analyses - non-continuous treatment #########
#######################################################################

rm(list=ls())
# working directory should be folder extr_weather_rep:
getwd()

# Libraries
library(tidyverse)
library(rio)
library(lfe)
library(grDevices)

# for specification curve analysis
source("code/spec_chart_function.R")

# 0. import data ####
resp <- import("data/respondents_final.csv")

# exclude all respondents that are outside of Switzerland
resp <- dplyr::filter(resp, ID != 39)
resp <- dplyr::filter(resp, ID != 1000)
resp <- dplyr::filter(resp, ID != 1206)
resp <- dplyr::filter(resp, ID != 2117)

# variables
n_start <- which(names(resp) == "temp_extreme_w1w4_percent")
n_end <- which(names(resp) == "dis_binary_w1w4_summer")
treat <- names(resp[,c(n_start:n_end)])

# get variation in independent variables
quart1 <- apply(resp[,n_start:n_end], 2, quantile, 0.25)
quart3 <- apply(resp[,n_start:n_end], 2, quantile, 0.75)
rm(n_start); rm(n_end)

# turn weather variables into factor variables depending on distribution
# turn into three categories: low, middle, high
treat_sel <- treat[1:18]

# turn into two categories: low and high
treat_sel2 <- treat[c(19,20)]

# rest is again 3 categories
treat_sel3 <- treat[21:32]

# turn weather variables into factor variables with 3 or 2 categories
resp <- resp %>% 
  mutate(across(treat_sel, 
                ~cut(.x, breaks = c(0, quantile(.x, 0.34),
                                    quantile(.x, 0.67), 100),
                     labels=c('low', 'middle', 'high'),
                     include.lowest = T),
                .names = "{.col}_fact")) %>%
  mutate(across(treat_sel2, 
                ~cut(.x, breaks = c(0, quantile(.x, 0.5),
                                    100),
                     labels=c('low', 'high'),
                     include.lowest = T),
                .names = "{.col}_fact")) %>% 
  mutate(across(treat_sel3, 
                ~cut(.x, breaks = c(0, quantile(.x, 0.34),
                                    quantile(.x, 0.67), 100),
                     labels=c('low', 'middle', 'high'),
                     include.lowest = T),
                .names = "{.col}_fact")) 

resp$dis_binary_w1w4_fact <- base::as.factor(resp$dis_binary_w1w4)
levels(resp$dis_binary_w1w4_fact) <- c("low", "high")

resp$dis_binary_w1w4_summer_fact <- base::as.factor(resp$dis_binary_w1w4_summer)
levels(resp$dis_binary_w1w4_summer_fact) <- c("low", "high")

# we now focus on the factor treatment variables
n_start <- which(names(resp) == "temp_extreme_w1w4_percent_fact")
n_end <- which(names(resp) == "dis_binary_w1w4_summer_fact")
treat_fact <- names(resp[,c(n_start:n_end)])
rm(treat); rm(treat_sel); rm(treat_sel2)

#######################################
# One plot per step ####
#######################################

# Set-up ###
graph_height <- 10

operat <- import("data/operationalisations.xlsx")
treat_labels <- operat[,2] %>% as.vector()
# treat_labels <- str_remove(treat_fact,"extreme_") %>% str_remove(.,"_percent") %>% str_remove(.,"_w1w4") %>% str_remove(., "_fact")
treat_labels_list <- list("Temperature:" = treat_labels[1:20],
                          "Precipitation:" = treat_labels[21:28],
                          "Events:" = treat_labels[29:34])

## Step 4 - factorised ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat_fact, "policy_pref_w4", "policy_pref_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = policy_pref_w4,
                                         outcome_w1 = policy_pref_w1))
# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat_fact), funs(str_c(treat_fact, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat_fact)) {
  # create empty variable to fill
  resp_step4$new_variable <- factor("low")
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat_fact[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))

resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)

# 1. Baseline regression

# dependent variable in combination with all treatment variables
flist <- lapply(treat_fact, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef_middle=coef(reg)[1], 
             coef_high=coef(reg)[2], 
             se_middle=sqrt(diag(vcov(reg)))[1], 
             se_high =sqrt(diag(vcov(reg)))[2],
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

# effect of medium
regs1$l1_middle <-  regs1$coef_middle - qnorm(0.975)*regs1$se_middle
regs1$h1_middle <-  regs1$coef_middle + qnorm(0.975)*regs1$se_middle
# significant if both are negative or both are positive
regs1$sig_middle <- 0 
regs1$sig_middle[regs1$l1_middle < 0 & regs1$h1_middle < 0 |regs1$l1_middle > 0 & regs1$h1_middle > 0] <- 1
regs1$l1_middle <- NULL
regs1$h1_middle <- NULL

# effect of high
regs1$l1_high <-  regs1$coef_high - qnorm(0.975)*regs1$se_high
regs1$h1_high <-  regs1$coef_high + qnorm(0.975)*regs1$se_high
# significant if both are negative or both are positive
regs1$sig_high <- 0 
regs1$sig_high[regs1$l1_high < 0 & regs1$h1_high < 0 |regs1$l1_high > 0 & regs1$h1_high > 0] <- 1
regs1$l1_high <- NULL
regs1$h1_high <- NULL

# turn into long dataframe: two observations per treatment
regs1_long <- pivot_longer(regs1,
                           cols = -c(r2, fe_w4),
                           names_to = c(".value", "contrast"),
                           values_to = c("coef", "se",
                                         "sig"),
                           names_sep = "_")

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat_fact, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat_fact)
# repeat each row twice
regs2_long <- regs2[rep(seq_len(nrow(regs2)), each = 2), ] 

data <- cbind(regs1_long,regs2_long)
rm(regs1, regs2, regs1_long, regs2_long)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
# tricky: we need to throw out 4 significances of the models that actually only have two factors, not three.
sig <- sig[c(1:36, 37, 39, 41:65, 67)]

# remove contrast (always middle, then high)
data$contrast <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)

# only complete data
data <- data[complete.cases(data)==T,]

# Enter labels in order they appear in table
# four variables have only one estimand, defined earlier
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_ordered_factor.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       #order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13,
       n = 2)

dev.off()
